import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as sns### so that u dont have warningsfrom warnings import filterwarningsfilterwarnings('ignore')xxxxxxxxxx#### Columns description1. url contains the url of the restaurant in the zomato website2. address contains the address of the restaurant in Bengaluru3. name contains the name of the restaurant4. online_order whether online ordering is available in the restaurant or not5. book_table table book option available or not6. rate contains the overall rating of the restaurant out of 57. votes contains total number of rating for the restaurant as of the above mentioned date8. phone contains the phone number of the restaurant9. location contains the neighborhood in which the restaurant is located10. rest_type restaurant type11. dish_liked dishes people liked in the restaurant12. cuisines food styles, separated by comma13. approx_cost(for two people) contains the approximate cost for meal for two people14. reviews_list list of tuples containing reviews for the restaurant, each tuple15. menu_item contains list of menus available in the restaurant16. listed_in(type) type of meal17. listed_in(city) contains the neighborhood in which the restaurant is listedurl contains the url of the restaurant in the zomato website
address contains the address of the restaurant in Bengaluru
name contains the name of the restaurant
online_order whether online ordering is available in the restaurant or not
book_table table book option available or not
rate contains the overall rating of the restaurant out of 5
votes contains total number of rating for the restaurant as of the above mentioned date
phone contains the phone number of the restaurant
location contains the neighborhood in which the restaurant is located
rest_type restaurant type
dish_liked dishes people liked in the restaurant
cuisines food styles, separated by comma
approx_cost(for two people) contains the approximate cost for meal for two people
reviews_list list of tuples containing reviews for the restaurant, each tuple
menu_item contains list of menus available in the restaurant
listed_in(type) type of meal
listed_in(city) contains the neighborhood in which the restaurant is listed
xxxxxxxxxx#read datasetdf=pd.read_csv(r'C:\FormMyConputer\Data Analysis\3-Zomato Data Analysis/zomato.csv')df.head()xxxxxxxxxxdf.shapexxxxxxxxxxdf.dtypesxxxxxxxxxxlen(df['name'].unique())xxxxxxxxxxdf.isna().sum()xxxxxxxxxxfeature_na=[feature for feature in df.columns if df[feature].isnull().sum()>0]feature_naxxxxxxxxxx#% of missing valuesfor feature in feature_na: print('{} has {} % missing values'.format(feature,np.round(df[feature].isnull().sum()/len(df)*100,4)))xxxxxxxxxxdf['rate'].unique()xxxxxxxxxxdf.dropna(axis='index',subset=['rate'],inplace=True)xxxxxxxxxxdf.shapexxxxxxxxxxdef split(x): return x.split('/')[0]xxxxxxxxxxdf['rate']=df['rate'].apply(split)xxxxxxxxxxdf.head()xxxxxxxxxxdf['rate'].unique()xxxxxxxxxxdf.replace('NEW',0,inplace=True)xxxxxxxxxxdf.replace('-',0,inplace=True)xxxxxxxxxxdf['rate']=df['rate'].astype(float)xxxxxxxxxxdf.groupby('name')['rate'].mean().nlargest(20).plot.bar()xxxxxxxxxxdf_rate=df.groupby('name')['rate'].mean().to_frame()df_rate=df_rate.reset_index()df_rate.columns=['restaurant','rating']df_rate.head(20)xxxxxxxxxxdf_rate.shapexxxxxxxxxx#### alternative is create a list in which u have all the restaurants & in another list,we have all ratings & then using zip we can create a dataframexxxxxxxxxx'''restaurant=[]avg_rating=[]for key,name_df in df.groupby('name'): restaurant.append(key) avg_rating.append(np.mean(name_df['rate'])'''xxxxxxxxxx'''df_rate=pd.DataFrame(zip(restaurant,avg_rating))df_rate.columns=['restaurant','rating']df_rate.head(20)'''xxxxxxxxxx'''df_rate.shape'''xxxxxxxxxxsns.set_style(style='whitegrid')sns.distplot(df_rate['rating'])xxxxxxxxxx##### Almost more than 50 percent of restaurants has rating between 3 and 4. Restaurants having rating more than 4.5 are very rare.xxxxxxxxxxxxxxxxxxxx#### Which are the top restaurant chains in Bangaluru?xxxxxxxxxxplt.figure(figsize=(10,7))chains=df['name'].value_counts()[0:20]sns.barplot(x=chains,y=chains.index,palette='deep')plt.title("Most famous restaurants chains in Bangaluru")plt.xlabel("Number of outlets") xxxxxxxxxxxxxxxxxxxx##### How many of the restuarants do not accept online orders?xxxxxxxxxxx=df['online_order'].value_counts()labels=['accepted','not accepted']plt.pie(x,explode=[0.0,0.1],autopct='%1.1f%%')xxxxxxxxxx##### using plotly, How many of the restuarants do not accept online orders?xxxxxxxxxx!pip install plotlyxxxxxxxxxximport plotly.express as pxxxxxxxxxxxx=df['online_order'].value_counts()labels=['accepted','not accepted']xxxxxxxxxxfig = px.pie(df, values=x, names=labels,title='Pie chart')fig.show()xxxxxxxxxxxxxxxxxxxx#### What is the ratio b/w restaurants that provide and do not provide table booking ?xxxxxxxxxxx=df['book_table'].value_counts()labels=['not book','book']plt.pie(x,explode=[0.0,0.1],autopct='%1.1f%%')xxxxxxxxxx##### using plotly, #### What is the ratio b/w restaurants that provide and do not provide table booking ?xxxxxxxxxximport plotly.graph_objs as gofrom plotly.offline import iplotxxxxxxxxxxx=df['book_table'].value_counts()labels=['not book','book']xxxxxxxxxxtrace=go.Pie(labels=labels, values=x, hoverinfo='label+percent', textinfo='value', textfont=dict(size=25), pull=[0, 0, 0,0.2, 0] )xxxxxxxxxxiplot([trace])xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxdf['rest_type'].isna().sum()xxxxxxxxxxdf['rest_type'].dropna(inplace=True)xxxxxxxxxxdf['rest_type'].isna().sum()xxxxxxxxxxlen(df['rest_type'].unique())xxxxxxxxxxHow many types of restaurants we have?How many types of restaurants we have?
xxxxxxxxxxplt.figure(figsize=(20,12))df['rest_type'].value_counts().nlargest(20).plot.bar(color='red')plt.gcf().autofmt_xdate()xxxxxxxxxx##### using plotly, How many types of restaurant do we havexxxxxxxxxxtrace1 = go.Bar( x = df['rest_type'].value_counts().nlargest(20).index, y = df['rest_type'].value_counts().nlargest(20), name= 'rest_type')xxxxxxxxxxiplot([trace1])xxxxxxxxxxxxxxxxxxxx##### At all, Banglore is known as the tech capital of India,people having busy and modern life will prefer Quick Bites.##### We can observe tha Quick Bites type restaurants dominates.xxxxxxxxxxxxxxxxxxxxdf.groupby('name')['votes'].max().nlargest(10).plot.bar() xxxxxxxxxxtrace1 = go.Bar( x = df.groupby('name')['votes'].max().nlargest(10).index, y = df.groupby('name')['votes'].max().nlargest(10), name= 'name')xxxxxxxxxxiplot([trace1])xxxxxxxxxxxxxxxxxxxx### total restaurants at different locations of Bengalorexxxxxxxxxxdf.groupby('location')['name'].unique()xxxxxxxxxxrestaurant=[]location=[]for key,location_df in df.groupby('location'): location.append(key) restaurant.append(len(location_df['name'].unique()))xxxxxxxxxxdf_total=pd.DataFrame(zip(location,restaurant))df_total.columns=['location','restaurant']df_total.set_index('location',inplace=True)df_total.sort_values(by='restaurant').tail(10)xxxxxxxxxxdf_total.sort_values(by='restaurant').tail(10).plot.bar()xxxxxxxxxxdf_total.sort_values(by='restaurant').tail(10).indexxxxxxxxxxxtrace1 = go.Bar( x = df_total['restaurant'].nlargest(10).index, y = df_total['restaurant'].nlargest(10), name= 'Priority')xxxxxxxxxxiplot([trace1])xxxxxxxxxxdf.isnull().sum()xxxxxxxxxx#data is represented in form of percentage (df['rest_type'].value_counts()/len(df))*100xxxxxxxxxx### Total number of variety of restaurants ie north indian,south Indianxxxxxxxxxxcuisines=df['cuisines'].value_counts()[:10]sns.barplot(cuisines,cuisines.index)plt.xlabel('Count')plt.title("Most popular cuisines of Bangalore")xxxxxxxxxxcuisines=df['cuisines'].value_counts()[:10]trace1 = go.Bar( x = cuisines.index, y = cuisines, name= 'Cuisines')xxxxxxxxxxiplot([trace1])xxxxxxxxxx### We can observe that North Indian,chinese,South Indian and Biriyani are most common.### It means Bengalore is more influenced by North Indian culture more than Southxxxxxxxxxxxxxxxxxxxxlen(df['approx_cost(for two people)'].value_counts())xxxxxxxxxxdf['approx_cost(for two people)'].isna().sum()xxxxxxxxxxdf.dropna(axis='index',subset=['approx_cost(for two people)'],inplace=True)xxxxxxxxxxdf['approx_cost(for two people)'].isna().sum()xxxxxxxxxxdf['approx_cost(for two people)'].unique()xxxxxxxxxxdef is_float(x): try: float(x) except: return False return Truexxxxxxxxxxdf[~df['approx_cost(for two people)'].apply(is_float)]xxxxxxxxxxdf['approx_cost(for two people)'].dtypexxxxxxxxxxdf['approx_cost(for two people)']xxxxxxxxxxtype(df['approx_cost(for two people)'][0])xxxxxxxxxxdf['approx_cost(for two people)'] = df['approx_cost(for two people)'].apply(lambda x: x.replace(',',''))xxxxxxxxxxdf['approx_cost(for two people)']=df['approx_cost(for two people)'].astype(int)xxxxxxxxxxxxxxxxxxxxplt.figure(figsize=(10,7))sns.scatterplot(x="rate",y='approx_cost(for two people)',hue='online_order',data=df)plt.show()xxxxxxxxxx##### from this scattterplot,we can come up with a conclusion ya most of the highest rated accepts online order and they r budgeted tooxxxxxxxxxxxxxxxxxxxxdf.head()xxxxxxxxxxxxxxxxxxxx### Is there any difference b/w votes of restaurants accepting and not accepting online orders?xxxxxxxxxxsns.boxplot(x='online_order',y='votes',data=df)xxxxxxxxxx#### from this boxplot,we can observe that median number of votes for both categories vary.#### Restaurants accepting online orders get more votes from customers as there is a rating option poping up after each order through zomato application.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx#### Is there any difference b/w price of restaurants accepting and not accepting online orders?xxxxxxxxxxsns.boxplot(x='online_order',y='approx_cost(for two people)',data=df)xxxxxxxxxxfig = px.box(df,x='online_order',y='approx_cost(for two people)')fig.show()xxxxxxxxxx#### Restaurants accepting online orders are more affordable than Restaurants who are accepting online ordersxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxdf['approx_cost(for two people)'].min()xxxxxxxxxxdf['approx_cost(for two people)'].max()xxxxxxxxxxdf[df['approx_cost(for two people)']==6000]xxxxxxxxxxdf[df['approx_cost(for two people)']==6000]['name']xxxxxxxxxxxxxxxxxxxxplt.figure(figsize=(6,6))sns.distplot(df['approx_cost(for two people)'])plt.show()xxxxxxxxxxpx.histogram(df, x="approx_cost(for two people)")xxxxxxxxxx##### most of the price lies between in a range of under 1000,it means most are affordable & very few are luxuriousxxxxxxxxxxxxxxxxxxxx### Most costly Rate for 2 people is served at which Restaurant what exactly is the dish involved in this and liked dish of that restaurantxxxxxxxxxxdf[df['approx_cost(for two people)']==6000].loc[:,('name','cuisines','dish_liked')]xxxxxxxxxxdf[df['approx_cost(for two people)']==6000][['name','cuisines','dish_liked']]xxxxxxxxxxdata=df.copy()xxxxxxxxxxdata.dtypesxxxxxxxxxxdata.set_index('name',inplace=True)xxxxxxxxxxxxxxxxxxxx### Top 10 Most Expensive restaurant with approx cost for 2 peoplexxxxxxxxxxdata['approx_cost(for two people)'].nlargest(10).plot.bar()xxxxxxxxxxtrace1 = go.Bar( x = data['approx_cost(for two people)'].nlargest(10).index, y = data['approx_cost(for two people)'].nlargest(10), name= 'Priority')iplot([trace1])xxxxxxxxxxxxxxxxxxxx### Top 10 Cheapest restaurant with approx cost for 2 peoplexxxxxxxxxxdata['approx_cost(for two people)'].nsmallest(10).plot.bar()xxxxxxxxxxtrace1 = go.Bar( x = data['approx_cost(for two people)'].nsmallest(10).index, y = data['approx_cost(for two people)'].nsmallest(10), name= 'Priority')iplot([trace1])xxxxxxxxxxxxxxxxxxxx### Top 10 Cheapest restaurant location wise with approx cost for 2 people xxxxxxxxxxdata.set_index('location',inplace=True)data['approx_cost(for two people)'].nsmallest(10)xxxxxxxxxx### all the restautant that are below than 500(budget hotel)xxxxxxxxxxdata[data['approx_cost(for two people)']<=500]xxxxxxxxxxdf_budget=data[data['approx_cost(for two people)']<=500].loc[:,('approx_cost(for two people)')]df_budget=df_budget.reset_index()df_budget.head()xxxxxxxxxxdf_budget['approx_cost(for two people)'].value_counts().plot.bar()xxxxxxxxxx### we can conclude that 300 and 400 cost dishes are maximum in countxxxxxxxxxxtrace1 = go.Bar( x = df_budget['approx_cost(for two people)'].value_counts().index, y = df_budget['approx_cost(for two people)'].value_counts(), name= 'Priority')iplot([trace1])xxxxxxxxxxxxxxxxxxxx### Restaurants that have better rating >4 and that are under budget tooxxxxxxxxxxdf[(df['rate']>=4) & (df['approx_cost(for two people)']<=500)].shapexxxxxxxxxx### Total no. of Restaurants that have better rating >4 and that are under budget too ie less than 500xxxxxxxxxxdf_new=df[(df['rate']>=4) & (df['approx_cost(for two people)']<=500)]len(df_new['name'].unique())xxxxxxxxxxxxxxxxxxxx### Total such various affordable hotels at different locationxxxxxxxxxxlocation=[]total=[]for loc,location_df in df_new.groupby('location'): location.append(loc) total.append(len(location_df['name'].unique())) xxxxxxxxxxlen(location)xxxxxxxxxxlen(total)xxxxxxxxxxlocation_df=pd.DataFrame(zip(location,total))location_df.columns=['location','restaurant']location_df.set_index('location',inplace=True)xxxxxxxxxxlocation_df.head(20)xxxxxxxxxxtype(location)xxxxxxxxxxlocation_df['restaurant'].nlargest(10).plot.bar()plt.gcf().autofmt_xdate()plt.ylabel('Total restaurants')xxxxxxxxxxtrace1 = go.Bar( x = location_df['restaurant'].nlargest(10).index, y = location_df['restaurant'].nlargest(10), name= 'Priority')iplot([trace1])xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx# To Visualise what are the names of those hotelsxxxxxxxxxxlocation=[]total=[]for loc,location_df in df_new.groupby('location'): location.append(loc) total.append(location_df['name'].unique()) xxxxxxxxxxafford=pd.DataFrame(zip(location,total))afford.columns=['location','res_names']afford.set_index('location',inplace=True)afford.head()xxxxxxxxxxxxxxxxxxxx### Finding Best budget Restaurants in any locationxxxxxxxxxx###### we will pass location and restaurant type as parameteres,function will return name of restaurants.xxxxxxxxxxdef return_budget(location,restaurant): budget=df[(df['approx_cost(for two people)']<=400) & (df['location']==location) & (df['rate']>4) & (df['rest_type']==restaurant)] return(budget['name'].unique())xxxxxxxxxxreturn_budget('BTM',"Quick Bites")xxxxxxxxxxxxxxxxxxxxplt.figure(figsize=(10,7))Restaurant_locations=df['location'].value_counts()[:20]sns.barplot(Restaurant_locations,Restaurant_locations.index)xxxxxxxxxxRestaurant_locations=df['location'].value_counts()[:20]trace1 = go.Bar( x = Restaurant_locations.index, y = Restaurant_locations, name= 'Priority')iplot([trace1])xxxxxxxxxx#### We can see that BTM,HSR and Koranmangala 5th block has the most number of restaurants.#### BTM dominates the section by having more than 5000 restaurants.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx#### I need Latitudes & longitudes for each of the place for geaographical Data analysis,so to fetch lat,lon of each place,use Geopyxxxxxxxxxxdf.shapexxxxxxxxxxlen(df['location'].unique())xxxxxxxxxxlocations=pd.DataFrame({"Name":df['location'].unique()})xxxxxxxxxxlocations['new_Name']='Bangalore '+locations['Name']xxxxxxxxxxlocations.head()xxxxxxxxxx!pip install geopyxxxxxxxxxxfrom geopy.geocoders import Nominatimxxxxxxxxxxlat_lon=[]geolocator=Nominatim(user_agent="app")for location in locations['Name']: location = geolocator.geocode(location) if location is None: lat_lon.append(np.nan) else: geo=(location.latitude,location.longitude) lat_lon.append(geo)xxxxxxxxxxlocations['geo_loc']=lat_lonxxxxxxxxxxlocations.head()xxxxxxxxxxlocations.to_csv('zomato_locations.csv',index=False)xxxxxxxxxxRest_locations=pd.DataFrame(df['location'].value_counts().reset_index())xxxxxxxxxxRest_locations.columns=['Name','count']Rest_locations.head()xxxxxxxxxxlocations.head()xxxxxxxxxxlocations.shapexxxxxxxxxxRest_locations.shapexxxxxxxxxxRestaurant_locations=Rest_locations.merge(locations,on='Name',how="left").dropna()Restaurant_locations.head()xxxxxxxxxxRestaurant_locations.shapexxxxxxxxxxRestaurant_locations['count'].max()xxxxxxxxxxtype(Restaurant_locations['geo_loc'][0])xxxxxxxxxxdef generateBaseMap(default_location=[12.97, 77.59], default_zoom_start=12): base_map = folium.Map(location=default_location, zoom_start=default_zoom_start) return base_mapxxxxxxxxxxlen(Restaurant_locations['geo_loc'])xxxxxxxxxxRestaurant_locations.isna().sum()xxxxxxxxxxRestaurant_locations['geo_loc'][0][0]xxxxxxxxxxRestaurant_locations['geo_loc'][0][1]xxxxxxxxxxnp.array(Restaurant_locations['geo_loc'])xxxxxxxxxx#### unzip itlat,lon=zip(*np.array(Restaurant_locations['geo_loc']))xxxxxxxxxxtype(lat)xxxxxxxxxxRestaurant_locations['lat']=latRestaurant_locations['lon']=lonxxxxxxxxxxRestaurant_locations.head()xxxxxxxxxx!pip install foliumxxxxxxxxxximport foliumfrom folium.plugins import HeatMapbasemap=generateBaseMap()xxxxxxxxxxbasemapxxxxxxxxxxRestaurant_locations[['lat','lon','count']].values.tolist()xxxxxxxxxxHeatMap(Restaurant_locations[['lat','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)xxxxxxxxxxbasemapxxxxxxxxxx##### It is clear that restaurants tend to concentrate in central bangalore area.##### The clutter of restaurants lowers are we move away from central.##### So,potential restaurant entrepreneurs can refer this and find out good locations for their venture.xxxxxxxxxx#### note heatmap is good when we have latitude,longitude or imporatnce of that particular place or count of that placexxxxxxxxxxdf.head()xxxxxxxxxxdf2= df[df['cuisines']=='North Indian']df2.head()xxxxxxxxxxnorth_india=df2.groupby(['location'],as_index=False)['url'].agg('count')north_india.columns=['Name','count']north_india.head()xxxxxxxxxxnorth_india=north_india.merge(locations,on="Name",how='left').dropna()xxxxxxxxxxnorth_india.head()xxxxxxxxxxnorth_india['lan'],north_india['lon']=zip(*north_india['geo_loc'].values)xxxxxxxxxxnorth_india.drop(['geo_loc'],axis=1)xxxxxxxxxxbasemap=generateBaseMap()HeatMap(north_india[['lan','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)basemapxxxxxxxxxxxxxxxxxxxxdf3= df[df['cuisines']=='South Indian']south_india=df2.groupby(['location'],as_index=False)['url'].agg('count')south_india.columns=['Name','count']south_india=south_india.merge(locations,on="Name",how='left').dropna()south_india['lan'],south_india['lon']=zip(*south_india['geo_loc'].values)xxxxxxxxxxsouth_india=south_india.drop(['geo_loc'],axis=1)xxxxxxxxxxsouth_india.head()xxxxxxxxxxbasemap=generateBaseMap()HeatMap(south_india[['lan','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)basemapxxxxxxxxxxxxxxxxxxxx##### Which are the most popular casual dining restaurant chains?xxxxxxxxxxdf_1=df.groupby(['rest_type','name']).agg('count')datas=df_1.sort_values(['url'],ascending=False).groupby(['rest_type'], as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False).head(3))['url'].reset_index().rename(columns={'url':'count'})xxxxxxxxxxdatasxxxxxxxxxxdf_1=df.groupby(['rest_type','name']).agg('count')df_1xxxxxxxxxxdf_1.sort_values(['url'],ascending=False)xxxxxxxxxxdf_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))xxxxxxxxxxdf_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url']xxxxxxxxxxdf_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url'].reset_index()xxxxxxxxxxdataset=df_1.sort_values(['url'],ascending=False).groupby(['rest_type'], as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url'].reset_index().rename(columns={'url':'count'})xxxxxxxxxxdatasetxxxxxxxxxx##### Which are the most popular casual dining restaurant chains?xxxxxxxxxxcasual=dataset[dataset['rest_type']=='Casual Dining']casualxxxxxxxxxxdf.shapexxxxxxxxxxWe can see tht Empire restaurant,Beijing bites and Mani's dum biriyani are the most popular casual dining restaurant chains in Bangalore.We will inspect them further...We can see tht Empire restaurant,Beijing bites and Mani's dum biriyani are the most popular casual dining restaurant chains in Bangalore. We will inspect them further...
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx!pip install wordcloudxxxxxxxxxxfrom wordcloud import WordCloudxxxxxxxxxxdf.head()xxxxxxxxxxdf['update_dish_liked']=df['dish_liked'].apply(lambda x : x.split(',') if type(x)==str else [''])xxxxxxxxxxdf.head()xxxxxxxxxxdf['rest_type'].value_counts()[:9].indexxxxxxxxxxxrest=df['rest_type'].value_counts()[:9].indexxxxxxxxxxx##### create wordcloud for each and every restaurantxxxxxxxxxxfrom wordcloud import WordCloud, STOPWORDS xxxxxxxxxxdf.isna().sum()xxxxxxxxxxdf.dropna(axis='index',subset=['rest_type'],inplace=True)xxxxxxxxxxdf.dropna(axis='index',subset=['dish_liked'],inplace=True)xxxxxxxxxxdf.isna().sum()xxxxxxxxxxdata=df[df['rest_type']=='Quick Bites']xxxxxxxxxxdata['dish_liked']xxxxxxxxxxstopwords=set(STOPWORDS)xxxxxxxxxxdishes=''for word in data['dish_liked']: words=word.split() # Converts each token into lowercase for i in range(len(words)): words[i] = words[i].lower() dishes=dishes+ " ".join(words)+" "wordcloud = WordCloud(max_font_size=None, background_color='white', collocations=False,stopwords = stopwords,width=1500, height=1500).generate(dishes)plt.imshow(wordcloud)plt.axis("off")xxxxxxxxxxxxxxxxxxxxdef produce_wordcloud(rest): plt.figure(figsize=(20,30)) for i,restaurant in enumerate(rest): plt.subplot(3,3,i+1) dishes='' data=df[df['rest_type']==restaurant] for word in data['dish_liked']: words=word.split() # Converts each token into lowercase for i in range(len(words)): words[i] = words[i].lower() dishes=dishes+ " ".join(words)+" " wordcloud = WordCloud(max_font_size=None, background_color='white', collocations=False,stopwords = stopwords,width=1500, height=1500).generate(dishes) plt.imshow(wordcloud) plt.title(restaurant) plt.axis("off")xxxxxxxxxxstopwords = set(STOPWORDS) produce_wordcloud(rest)xxxxxxxxxxxxxxxxxxxx##### analysing Reviews of Particular Restaurantxxxxxxxxxxdf.head()xxxxxxxxxxdf['reviews_list'][0]xxxxxxxxxxdata=df['reviews_list'][0].lower()dataxxxxxxxxxximport redata2=re.sub('[^a-zA-Z]', ' ',data)data2xxxxxxxxxxdata3=re.sub('rated', ' ',data2)data3xxxxxxxxxxdata4=re.sub('x',' ',data3)data4xxxxxxxxxxre.sub(' +',' ',data4)xxxxxxxxxxdataset=df[df['rest_type']=='Quick Bites']xxxxxxxxxxtype(dataset['reviews_list'][3])xxxxxxxxxxtotal_review=' 'for review in dataset['reviews_list']: review=review.lower() review=re.sub('[^a-zA-Z]', ' ',review) review=re.sub('rated', ' ',review) review=re.sub('x',' ',review) review=re.sub(' +',' ',review) total_review=total_review + str(review) xxxxxxxxxxwordcloud = WordCloud(width = 800, height = 800, background_color ='white', stopwords = stopwords, min_font_size = 10).generate(total_review) # plot the WordCloud image plt.figure(figsize = (8, 8)) plt.imshow(wordcloud) plt.axis("off") xxxxxxxxxxdef importance(restaurant): dataset=df[df['rest_type']==restaurant] total_review=' ' for review in dataset['reviews_list']: review=review.lower() review=re.sub('[^a-zA-Z]', ' ',review) review=re.sub('rated', ' ',review) review=re.sub('x',' ',review) review=re.sub(' +',' ',review) total_review=total_review + str(review) wordcloud = WordCloud(width = 800, height = 800, background_color ='white', stopwords = set(STOPWORDS), min_font_size = 10).generate(total_review) # plot the WordCloud image plt.figure(figsize = (8, 8)) plt.imshow(wordcloud) plt.axis("off") xxxxxxxxxximportance('Quick Bites')xxxxxxxxxxxxxxxxxxxx